import numpy as np
import jieba
import nltk

sA = "这只皮靴号码大了。那只号码合适。"
sB = "这只皮靴号码不小，那只更合适。"

sA_cut_l = jieba.cut(sA)
sB_cut_l = jieba.cut(sB, cut_all=True)

l = []
for i in sA_cut_l:
    l.append(i)

for i in sB_cut_l:
    l.append(i)

#
# sA_cut_d = nltk.FreqDist(sA)
# sB_cut_d = nltk.FreqDist(sB)
#
# print(sA_cut_d)

# t1 = np.array(l1)
# t2 = np.array(l2)
#
#
# def cos_sim(a, b):
#     a_norm = np.linalg.norm(a)
#     b_norm = np.linalg.norm(b)
#     cos = np.dot(a, b) / (a_norm * b_norm)
#     return cos
#
#
# print(cos_sim(t1, t2))
